home *** CD-ROM | disk | FTP | other *** search
- Path: xanth!cs.odu.edu!Amiga-Request
- From: Amiga-Request@cs.odu.edu (Amiga Sources/Binaries Moderator)
- Newsgroups: comp.sources.amiga
- Subject: v90i159: pattern.c - pattern matching routine from disksalv, Part01/01
- Message-ID: <12363@xanth.cs.odu.edu>
- Date: 28 Apr 90 18:49:25 GMT
- Sender: tadguy@cs.odu.edu
- Reply-To: daveh@cbmvax.uucp (Dave Haynie)
- Lines: 409
- Approved: tadguy@cs.odu.edu (Tad Guy)
- X-Mail-Submissions-To: Amiga@cs.odu.edu
- X-Post-Discussions-To: comp.sys.amiga
-
- Submitted-by: daveh@cbmvax.uucp (Dave Haynie)
- Posting-number: Volume 90, Issue 159
- Archive-name: examples/pattern.c/part01
-
- This is the stand-alone version of a pattern matching routine I wrote,
- originally for DiskSalv. It matches the full 1.3 AmigaDOS pattern
- language, not just the #? metacharacters handled by some of the pattern
- routines included with the various compilers on the market. And the
- actual pattern functions, CompilePattern() and MatchPattern(), compile
- to under 2k of object code.
-
- This code is completely original, and I'm making it public domain to
- encourage its use in other programs. Even commercial programs. I didn't
- research the problem, I just attacked it, so I can't swear there isn't a
- more efficient way to handle this stuff. But it does seem to work.
-
- This all compiled under Lattice C, but confuses the global optimizer as
- of Lattice V5.02. It could probably be ported to Manx, or UNIX for
- that matter -- it's not real system specific.
-
- -Dave Haynie
-
- #!/bin/sh
- # This is a shell archive. Remove anything before this line, then unpack
- # it by saving it into a file and typing "sh file". To overwrite existing
- # files, type "sh file -c". You can also feed this as standard input via
- # unshar, or by typing "sh <file", e.g.. If this archive is complete, you
- # will see the following message at the end:
- # "End of archive 1 (of 1)."
- # Contents: pattern.c
- # Wrapped by tadguy@xanth on Sat Apr 28 14:48:53 1990
- PATH=/bin:/usr/bin:/usr/ucb ; export PATH
- if test -f 'pattern.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'pattern.c'\"
- else
- echo shar: Extracting \"'pattern.c'\" \(10896 characters\)
- sed "s/^X//" >'pattern.c' <<'END_OF_FILE'
- X/* ====================================================================== */
- X
- X/*
- X AmigaDOS Pattern Matching Demo
- X March 20, 1990
- X
- X by Dave Haynie
- X
- X Released to the Public Domain
- X
- X This is the AmigaDOS Pattern Matcher I developed for DiskSalv
- X V1.40. So far, it seems to work just fine, and certainly works
- X better than any public domain code I managed to dig up, most
- X of which was limited to the '?' and '#' characters instead of
- X full AmigaDOS patterns.
- X
- X This will compile under Lattice V5.02 with the command:
- X
- X lc -L pattern.c
- X
- X The Lattice global optimizer seems to have trouble with it, at
- X least with the V5.02 system.
- X
- X With the function prototypes removed, I suspect it would be a
- X simple port to the Manx V3.6a compiler.
- X
- X I'd certainly appreciate any feedback on this code, especially
- X if there are any patterns it doesn't correctly match.
- X*/
- X
- X#include <exec/types.h>
- X#include <string.h>
- X#include <stdio.h>
- X#include <setjmp.h>
- X
- X/* ====================================================================== */
- X
- X/* These are my allocator routines, slight variations on the C library
- X routines. The pattern matching code was designed to use the DiskSalv
- X safe chunky allocator, and rather than re-write the code to use
- X something else, I provide some basically equivalent routines below.
- X The pattern compiler allocates string memory, and works best with a
- X memory allocator that can be cleaned up at program's end (eg, a plain
- X AllocMem() here isn't a good idea). */
- X
- Xjmp_buf lowmem; /* Don't forget to initialize this! */
- X
- X/* This is the basic safe allocator; it always returns a valid pointer.
- X The pattern matcher counts on the memory returned having been zeroed
- X out! */
- X
- Xchar *salloc(LONG size) {
- X char *ptr;
- X
- X if (!(ptr = (char *)malloc(size))) longjmp(lowmem,-1);
- X setmem(ptr,size,'\0');
- X return ptr;
- X}
- X
- X/* This is just for compatibility purposes. */
- X
- Xvoid sfree(char *ptr) {
- X free(ptr);
- X}
- X
- X/* ====================================================================== */
- X
- X/* This is the definition of a pattern element. The pattern compiler will
- X create an array of these elements from a string passed to it. */
- X
- Xtypedef struct {
- X char *aux; /* Auxilary data; string or paren-count */
- X BYTE type; /* Corresponding character type */
- X} pattern;
- X
- X/* The types, for the "special" array. */
- X
- X#define pSTRING 0 /* Plain old string */
- X#define pALT 1 /* The alternation character */
- X#define pREP 2 /* The repeat character */
- X#define pANY 3 /* The match-any character */
- X#define pNULL 4 /* The match-null character */
- X#define pSUB 5 /* A subpattern */
- X#define pEND 6 /* End of the pattern. */
- X
- X/* This is the definition of a pattern matching function, for use in the
- X "MatchSub()" coroutine. */
- X
- Xtypedef BOOL (*patfunc)(pattern *, char *);
- X
- X/* ====================================================================== */
- X
- X/* This section contains the pattern compiler code. */
- X
- X/* This function takes in a string from which one parenthesis has been
- X stripped. It returns a pointer to the position of the matching
- X parenthesis, or NULL if it isn't found. */
- X
- Xstatic char *FindParen(char *str) {
- X short parencnt = 1;
- X
- X while (*str) {
- X switch (*str) {
- X case '(' :
- X ++parencnt;
- X break;
- X case ')' :
- X if (--parencnt == 0) return str;
- X break;
- X default :
- X break;
- X }
- X ++str;
- X }
- X return NULL;
- X}
- X
- X/* This function creates a pattern for later pattern matching. It checks
- X syntax as well, since we don't want invalid patterns to possibly crash
- X the machine or otherwise cause trouble. This is a destructive compile,
- X in that it trashes the input string. */
- X
- Xpattern *CompilePattern(char *str) {
- X pattern *pat, *result = NULL;
- X short i = 0, j, tlen, plen = strlen(str)+1;
- X char fch, nch, *sub, *tmp;
- X
- X if (!str) return NULL;
- X
- X pat = (pattern *)salloc(sizeof(pattern)*plen);
- X while (fch = *str++) {
- X nch = *str;
- X switch (fch) {
- X case '(' : /* The start of a group */
- X if (nch == '|' || nch == ')') goto fail;
- X pat[i].type = pSUB;
- X if (!(str = FindParen(sub = str))) goto fail;
- X *str++ = '\0';
- X if (!(pat[i].aux = (char *)CompilePattern(sub))) goto fail;
- X break;
- X case ')' : /* We should never see the end of a group */
- X goto fail;
- X case '|' : /* Alternation */
- X if (nch == '|' || nch == ')') goto fail;
- X pat[i].type = pALT;
- X break;
- X case '#' : /* Repeatition */
- X if (nch == '#' || nch == '|' || nch == ')' || !nch) goto fail;
- X pat[i].type = pREP;
- X switch (nch) {
- X case '(' :
- X case '%' :
- X case '?' :
- X break;
- X case '\047':
- X nch = *str++;
- X default:
- X pat[++i].type = pSTRING;
- X pat[i].aux = (char *)salloc(2);
- X pat[i].aux[0] = nch;
- X str++;
- X break;
- X }
- X break;
- X case '?' : /* One of anything */
- X pat[i].type = pANY;
- X break;
- X case '%' : /* One of nothing */
- X pat[i].type = pNULL;
- X break;
- X default: /* Normal characters */
- X pat[i].type = pSTRING;
- X tmp = (char *)salloc((long)(tlen = strlen(str)+2));
- X tmp[0] = fch;
- X j = 1;
- X while (*str && !strchr("()|#?%",*str)) {
- X if (*str == '\047') ++str;
- X tmp[j++] = *str++;
- X }
- X tmp[j] = '\0';
- X strupr(strcpy(pat[i].aux = salloc(j+1),tmp));
- X sfree(tmp);
- X break;
- X }
- X ++i;
- X }
- X pat[i++].type = pEND;
- X result = (pattern *)memcpy(salloc(sizeof(pattern)*i),(char *)pat,sizeof(pattern)*i);
- X
- Xfail:
- X sfree((char *)pat);
- X return result;
- X}
- X
- X/* ====================================================================== */
- X
- X/* This is the actual pattern matching code. This code hasn't been really
- X optimized or anything; it's highly recursive, but it's designed to be
- X as nice to your stack as possible. There are four coroutines in my
- X pattern matcher. The top-level routine "MatchPattern()" takes in a
- X pattern and a string, and knows only how to subdivide an alternated
- X pattern and pass that on the the single pattern matcher, "MatchOne()".
- X That routine knows how to match strings, NULLs, and ANYs. It calls
- X "MatchRepeat()" to handle the repeat pattern, and "MatchSub()" to handle
- X a sub-pattern. Similarly, the "MatchRepeat()" routine knows how to match
- X repeated strings, NULLs, and ANYs. It also calls the "MatchSub()" routine
- X to handle repeated subpatterns. The "MatchSub()" splits the input string
- X into strings for the subpattern and parent pattern to match, and works
- X through these until a match is found or we've tried all possibilities.
- X It calls "MatchPattern()" on a simple subpattern, or "MatchRepeat()" on a
- X repeated subpattern. */
- X
- X/* Forward Declarations */
- X
- Xstatic BOOL MatchOne(pattern *, char *);
- Xstatic BOOL MatchSub(pattern *, pattern *, char *, patfunc);
- Xstatic BOOL MatchRepeat(pattern *, char *);
- X BOOL MatchPattern(pattern *, char *);
- X
- X/* This function matches a subpattern, where "sub" is the subpattern,
- X "pat" is the rest of the pattern at the current level, and "str"
- X is the usual string. The string splitting uses dynamic allocation
- X to avoid stack overflows as much as possible, the "subob" function
- X is the particular pattern matching function to use on the subpattern. */
- X
- Xstatic BOOL MatchSub(pattern *sub, pattern *pat, char *str, patfunc subop) {
- X short pos, len = strlen(str)+1;
- X char *copy = salloc(strlen(str)+1);
- X
- X for (pos = 0; pos < len; pos++) {
- X if ((*subop)(sub,copy) && MatchOne(pat,str)) {
- X sfree(copy);
- X return TRUE;
- X }
- X copy[pos] = *str++;
- X }
- X sfree(copy);
- X return FALSE;
- X}
- X
- X/* This function matches a repeat pattern. It returns the part of the
- X string not matched, or NULL if the whole string was matched. */
- X
- Xstatic BOOL MatchRepeat(pattern *pat, char *str) {
- X pattern *local = pat++;
- X
- X switch (local->type) {
- X case pSTRING :
- X while (TRUE) {
- X if (MatchPattern(pat,str)) return TRUE;
- X if (strnicmp(str,local->aux,strlen(local->aux))) break;
- X str += strlen(local->aux);
- X }
- X break;
- X case pANY :
- X do {
- X if (MatchPattern(pat,str)) return TRUE;
- X } while (*++str);
- X if (pat[0].type == pEND || pat[0].type == pALT) return TRUE;
- X break;
- X case pNULL :
- X break;
- X case pSUB :
- X if (MatchPattern(pat,str)) return TRUE;
- X return MatchSub((pattern *)local->aux,pat,str,MatchRepeat);
- X case pEND :
- X break;
- X }
- X return MatchOne(pat,str);
- X}
- X
- X/* This function tries to match a single pattern against the given
- X string. It is up to the calling function to worry about alternation. */
- X
- Xstatic BOOL MatchOne(pattern *pat, char *str) {
- X if (!str) return FALSE;
- X
- X while (pat->type != pEND && pat->type != pALT) {
- X switch (pat->type) {
- X case pSTRING:
- X if (strnicmp(str,pat->aux,strlen(pat->aux))) return FALSE;
- X str += strlen(pat->aux);
- X break;
- X case pREP :
- X return MatchRepeat(++pat,str);
- X case pANY :
- X if (!*str++) return FALSE;
- X break;
- X case pNULL :
- X break;
- X case pSUB :
- X return MatchSub((pattern *)pat->aux,pat+1,str,MatchPattern);
- X }
- X ++pat;
- X }
- X if (!*str) return TRUE;
- X return FALSE;
- X}
- X
- X/* This function matches a given string against a precompiled pattern. The
- X main function here doesn't do any actual matching; it just subdivides
- X a pattern into individual items based on any alternation used. */
- X
- XBOOL MatchPattern(pattern *pat, char *str) {
- X if (!str) return FALSE;
- X
- X while (pat && pat->type != pEND) {
- X if (MatchOne(pat,str)) return TRUE;
- X
- X while (pat->type != pALT && pat->type != pEND) ++pat;
- X if (pat->type == pALT) ++pat;
- X }
- X return FALSE;
- X}
- X
- X/* ====================================================================== */
- X
- X/* Here's a simple main program to use the pattern matcher. */
- X
- Xvoid main(int argc, char *argv[]) {
- X pattern *pat;
- X int i;
- X
- X if (setjmp(lowmem)) {
- X printf("Error: Out of memory!\n");
- X exit(10);
- X }
- X if (argc < 2) {
- X printf("Usage: %s pattern str1 [str2 ... strN]\n",argv[0]);
- X exit(0);
- X }
- X if (!(pat = CompilePattern(argv[1]))) {
- X printf("Error: Invalid pattern!\n");
- X exit(10);
- X }
- X
- X for (i = 2; i < argc; ++i) {
- X if (MatchPattern(pat,argv[i]))
- X printf("MATCH ");
- X else
- X printf("NOMATCH");
- X
- X printf(": \"%s\"\n",argv[i]);
- X }
- X}
- END_OF_FILE
- if test 10896 -ne `wc -c <'pattern.c'`; then
- echo shar: \"'pattern.c'\" unpacked with wrong size!
- fi
- # end of 'pattern.c'
- fi
- echo shar: End of archive 1 \(of 1\).
- cp /dev/null ark1isdone
- MISSING=""
- for I in 1 ; do
- if test ! -f ark${I}isdone ; then
- MISSING="${MISSING} ${I}"
- fi
- done
- if test "${MISSING}" = "" ; then
- echo You have the archive.
- rm -f ark[1-9]isdone
- else
- echo You still need to unpack the following archives:
- echo " " ${MISSING}
- fi
- ## End of shell archive.
- exit 0
- --
- Mail submissions (sources or binaries) to <amiga@cs.odu.edu>.
- Mail comments to the moderator at <amiga-request@cs.odu.edu>.
- Post requests for sources, and general discussion to comp.sys.amiga.
-